{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'3.6.7 (default, Oct 21 2018, 04:56:05) \\n[GCC 5.4.0 20160609]'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sys.version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import swifter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('0.25.1', '1.16.3', '0.292')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.__version__, np.__version__,swifter.__version__"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## dataframe.apply VS series.apply VS swifter.apply"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame({\n",
    "    'x': np.random.random(size=30000000)\n",
    "})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "apply"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5000156711783587"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1['x'].mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**vectorizable functions**: winner is swifter series.apply, by a small margin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def apply_to_array(arr):       \n",
    "    return np.add(np.multiply(arr,2),3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def apply_to_element(elem):\n",
    "    return (elem*2)+3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 172 ms, sys: 376 ms, total: 548 ms\n",
      "Wall time: 548 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "#dataframe.apply\n",
    "df1[['x']].apply(apply_to_array)\n",
    "True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 5.8 s, sys: 576 ms, total: 6.38 s\n",
      "Wall time: 6.37 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "# series.apply\n",
    "df1['x'].apply(apply_to_element)\n",
    "True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 140 ms, sys: 148 ms, total: 288 ms\n",
      "Wall time: 284 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "# swifter dataframe.apply\n",
    "df1[['x']].swifter.apply(apply_to_array)\n",
    "True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 72 ms, sys: 120 ms, total: 192 ms\n",
      "Wall time: 190 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "# swifter series.apply\n",
    "df1['x'].swifter.apply(apply_to_element)\n",
    "True"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**string functions:** winner is regular series.apply; swifter.apply fails miserably"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def num_to_str(num):\n",
    "    return str(num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 22.8 s, sys: 952 ms, total: 23.8 s\n",
      "Wall time: 23.8 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "# series.apply\n",
    "df1['x'].apply(num_to_str)\n",
    "True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a758eff8357949e2940fd6bd8288e615",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, description='Dask Apply', max=32, style=ProgressStyle(description_width='i…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "CPU times: user 1min 40s, sys: 5.66 s, total: 1min 45s\n",
      "Wall time: 2min 17s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "# swifter series.apply\n",
    "df1['x'].swifter.apply(num_to_str)\n",
    "True"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**if-then-else**: swifter wins by a small margin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "def if_then_else(x):\n",
    "    if x >= 0.5:\n",
    "        return True\n",
    "    else:\n",
    "        return False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 4.15 s, sys: 428 ms, total: 4.58 s\n",
      "Wall time: 4.57 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "# series.apply\n",
    "df1['x'].apply(if_then_else)\n",
    "True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fe7988e7062f45a8a077667dda60ef97",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, description='Dask Apply', max=32, style=ProgressStyle(description_width='i…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "CPU times: user 1 s, sys: 412 ms, total: 1.41 s\n",
      "Wall time: 3.83 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "# swifter series.apply\n",
    "df1['x'].swifter.apply(if_then_else)\n",
    "True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.6",
   "language": "python",
   "name": "python36"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
